import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
reviews = pd.read_excel('reviews.xlsx',engine='openpyxl')
reviews.head()
| Review Text | Rating | |
|---|---|---|
| 0 | 3 tags sewn in, 2 small (about 1'' long) and 1... | 1 |
| 1 | I was really hoping to like this, but it did n... | 1 |
| 2 | I usually wear a medium and bought a small. it... | 1 |
| 3 | I was very excited to order this top in red xs... | 1 |
| 4 | This is so thin and poor quality. especially f... | 1 |
reviews.shape
(1405, 2)
reviews.isna().any()
Review Text True Rating False dtype: bool
sum(reviews['Review Text'].isna())
49
We have 49 missing reviews text.
#Dropping the NaN columns
reviews.dropna(inplace=True)
reviews.shape
(1356, 2)
reviews.Rating.value_counts()
4 390 5 377 1 293 2 198 3 98 Name: Rating, dtype: int64
plt.bar(reviews.Rating.value_counts().index,reviews.Rating.value_counts().values,)
plt.xlabel("Rating")
plt.ylabel("Rating counts")
plt.title("Total Rating counts of clothing reviews")
plt.show()
Classifying 4,5 as 1(high) and 0,1,2 as 0(low) and disregarding 3.
#disregarding 3
reviews=reviews[reviews['Rating'].isin([1,2,4,5])]
reviews.shape
(1258, 2)
reviews.Rating.value_counts()
4 390 5 377 1 293 2 198 Name: Rating, dtype: int64
#Maping 1,2 AS 0 and 4,5 as 1
reviews['Target'] = reviews.Rating.map({1:0,2:0,4:1,5:1})
reviews.Target.value_counts()
1 767 0 491 Name: Target, dtype: int64
reviews.Target.value_counts().plot(kind='bar')
plt.ylabel('counts')
plt.title('Counts of low and high ratings')
Text(0.5, 1.0, 'Counts of low and high ratings')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import nltk
# Lemmatizing with POS Tag
def get_wordnet_pos(word):
"""Mapping the POS tag to first character lemmatize() accepts"""
#print(word)
tag = nltk.pos_tag([word])[0][1][0].upper()
#print(nltk.pos_tag([word])[0][1])
tag_dict = {"J": wordnet.ADJ,
"N": wordnet.NOUN,
"V": wordnet.VERB,
"R": wordnet.ADV}
return tag_dict.get(tag, wordnet.NOUN)
def text_preprocessing(review):
#print(review)
#print("-"*50)
new_words=[]
#removing puntuation
tokenizer = nltk.RegexpTokenizer(r"\w+")
#tokenizing
for item in tokenizer.tokenize(review):
#taking only the alphabets from collection of words
if item.isalpha():
#removing the stop_words
if not item.lower() in list(stopwords.words('english')):
new_words.append(item.lower())
#Initatializin Lemmatizer
lemmatizer = WordNetLemmatizer()
#Lemmatizing a Sentence with the appropriate POS tag
lemmatized_word=[lemmatizer.lemmatize(word, get_wordnet_pos(word)) for word in new_words]
#taking words greater than or equal to 3
sent = ' '.join([word for word in lemmatized_word if len(word)>=3])
return sent
reviews['Clean Text'] = reviews['Review Text'].apply(text_preprocessing)
reviews
| Review Text | Rating | Target | Clean Text | |
|---|---|---|---|---|
| 0 | 3 tags sewn in, 2 small (about 1'' long) and 1... | 1 | 0 | tag sewn small long huge itchy cut thread left... |
| 1 | I was really hoping to like this, but it did n... | 1 | 0 | really hop like look way model least sharkbite... |
| 2 | I usually wear a medium and bought a small. it... | 1 | 0 | usually wear medium bought small fit shape fla... |
| 3 | I was very excited to order this top in red xs... | 1 | 0 | excite order top red cute huge shapeless suppo... |
| 4 | This is so thin and poor quality. especially f... | 1 | 0 | thin poor quality especially price felt like t... |
| ... | ... | ... | ... | ... |
| 1400 | I bought it in the cream color and loved it so... | 5 | 1 | bought cream color love much bought black supe... |
| 1401 | This sweater is like a giant hug. i wore it to... | 5 | 1 | sweater like giant hug wore today receive many... |
| 1402 | This is a perfect saturday t shirt for the col... | 5 | 1 | perfect saturday shirt colder day great fit fo... |
| 1403 | I can never resist a peter pan collar, so i bo... | 5 | 1 | never resist peter pan collar bought top ivory... |
| 1404 | This is absolutely adorable and so flattering!... | 5 | 1 | absolutely adorable flatter pattern shirt purc... |
1258 rows × 4 columns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
# Creating a TfidfVectorizer: tfidf
tfidf = TfidfVectorizer()
# Apply fit_transform to document: csr_mat
csr_mat = tfidf.fit_transform(reviews['Clean Text'])
# Print result of toarray() method
print(csr_mat.toarray())
# Get the words: words
words = tfidf.get_feature_names()
# Print words
print(words)
[[0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] ... [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.]] ['ability', 'able', 'absolute', 'absolutely', 'absurd', 'accent', 'accentuate', 'acceptable', 'accepts', 'access', 'accessorize', 'accessory', 'accidentally', 'accommodate', 'accord', 'accurate', 'accurately', 'across', 'acrylic', 'act', 'activity', 'actual', 'actuality', 'actually', 'add', 'addition', 'additionally', 'aded', 'adequate', 'adhesive', 'adjust', 'adjustable', 'admire', 'admit', 'adn', 'adopt', 'adorable', 'adore', 'advertised', 'advice', 'aesthetic', 'afraid', 'age', 'ago', 'agree', 'ahead', 'air', 'airiness', 'airy', 'ala', 'alaska', 'alignment', 'allergy', 'allow', 'allows', 'almost', 'alone', 'along', 'already', 'also', 'alst', 'alter', 'alteration', 'alternation', 'alternative', 'although', 'always', 'amalfi', 'amaze', 'amazingly', 'amelia', 'amount', 'amp', 'ample', 'angeles', 'angle', 'animal', 'ankle', 'anna', 'annoy', 'annoyed', 'anoth', 'another', 'antebellum', 'antho', 'anthto', 'anti', 'anticipate', 'antique', 'antro', 'anxious', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apart', 'apparent', 'apparently', 'appeal', 'appear', 'appearance', 'appreciate', 'approach', 'appropriate', 'appropriately', 'approx', 'approximately', 'aprts', 'apt', 'aqua', 'arctic', 'area', 'arm', 'armhole', 'armpit', 'army', 'around', 'arrival', 'arrive', 'article', 'artsy', 'asap', 'asia', 'asian', 'aside', 'ask', 'aske', 'aspect', 'asscoaites', 'associate', 'assume', 'astound', 'asymmetrical', 'athletic', 'atlanta', 'atleast', 'atrocious', 'attach', 'attempt', 'attention', 'attire', 'attitude', 'attract', 'attractive', 'autobots', 'autumn', 'autumnal', 'available', 'average', 'avoid', 'await', 'aware', 'away', 'awesome', 'awful', 'awfully', 'awhile', 'awkward', 'awkwardly', 'baby', 'babydoll', 'back', 'backless', 'backpack', 'backpacker', 'backside', 'bad', 'badly', 'baffle', 'bag', 'bagg', 'baggie', 'baggier', 'bagginess', 'bagging', 'baggy', 'balance', 'ball', 'ballet', 'balloon', 'band', 'bandeau', 'bare', 'barely', 'barnyard', 'base', 'basic', 'basically', 'basket', 'batch', 'bathing', 'bathroom', 'bbq', 'beach', 'bead', 'beading', 'beadwork', 'beam', 'beautiful', 'beautifully', 'beauty', 'becaus', 'become', 'bed', 'bedelia', 'beef', 'befo', 'begin', 'behind', 'beige', 'believe', 'bell', 'belling', 'belly', 'belong', 'belt', 'bendable', 'beneath', 'benefit', 'besides', 'best', 'bestow', 'beware', 'beyond', 'bib', 'bicycle', 'big', 'bikini', 'bill', 'billow', 'billowy', 'binding', 'bingo', 'bit', 'bizarre', 'black', 'blah', 'bland', 'blanket', 'blazer', 'bleach', 'bleed', 'blend', 'blk', 'blonde', 'blood', 'bloom', 'blotchy', 'blouse', 'blousy', 'blue', 'bluish', 'blush', 'boarder', 'boat', 'bod', 'bodice', 'body', 'bodycon', 'bodysuit', 'bohemian', 'boho', 'boil', 'bomber', 'bone', 'boney', 'bonus', 'boo', 'boob', 'book', 'boost', 'boot', 'booty', 'bordeaux', 'boring', 'bother', 'bothersome', 'bottom', 'boucle', 'bough', 'bought', 'bountiful', 'bow', 'box', 'boxier', 'boxy', 'boy', 'boyfriend', 'bozo', 'bra', 'braid', 'brainer', 'braless', 'bralette', 'bralettes', 'brand', 'breaker', 'breast', 'breath', 'breathable', 'breathe', 'breathing', 'breeze', 'breezy', 'brick', 'bright', 'brightens', 'brighter', 'brilliant', 'bring', 'british', 'broad', 'brocade', 'broke', 'brought', 'brown', 'brownish', 'brunch', 'brunette', 'btw', 'bubble', 'buck', 'buckle', 'budget', 'build', 'building', 'built', 'bulge', 'bulk', 'bulkier', 'bulky', 'bullet', 'bum', 'bummer', 'bump', 'bumped', 'bunch', 'bunching', 'burgundy', 'burnt', 'business', 'bust', 'bustline', 'busty', 'busy', 'butt', 'butter', 'butterfly', 'buttock', 'button', 'buttondown', 'buttt', 'buy', 'buyer', 'buying', 'byron', 'caftan', 'cage', 'cake', 'calf', 'california', 'call', 'camel', 'cami', 'camisol', 'camisole', 'camo', 'camoflage', 'camp', 'campole', 'camsiole', 'candy', 'cannot', 'canopy', 'cant', 'canvas', 'cap', 'cape', 'capri', 'captain', 'car', 'caramel', 'carbon', 'card', 'cardi', 'cardigan', 'cardinal', 'care', 'careful', 'carefully', 'cargo', 'carry', 'cartonnier', 'case', 'cashmere', 'cast', 'casual', 'casually', 'catalog', 'catch', 'category', 'caught', 'causal', 'cause', 'cautious', 'caveat', 'cedar', 'cent', 'center', 'certain', 'certainly', 'chair', 'challenge', 'chambray', 'chance', 'change', 'character', 'charcoal', 'charge', 'charlie', 'charm', 'chart', 'che', 'cheap', 'cheapens', 'cheaper', 'cheapish', 'cheaply', 'check', 'checker', 'cheek', 'cheerful', 'chemical', 'chemise', 'chest', 'chested', 'chevron', 'chic', 'chiffon', 'child', 'chillier', 'chilly', 'china', 'chino', 'chloe', 'choice', 'choke', 'choker', 'choose', 'chose', 'christmas', 'chrolox', 'chubbiness', 'chunky', 'church', 'cinch', 'cinque', 'circa', 'circulation', 'city', 'clasp', 'clasped', 'class', 'classic', 'classy', 'clay', 'cld', 'clean', 'cleaning', 'clear', 'clearance', 'clearly', 'cleavage', 'cleverly', 'climate', 'cling', 'clingy', 'clip', 'clipped', 'close', 'closely', 'closer', 'closet', 'closure', 'cloth', 'clothes', 'clothing', 'clown', 'clung', 'coarse', 'coast', 'coat', 'cocktail', 'cocoon', 'cold', 'colder', 'collar', 'collarless', 'collect', 'collection', 'college', 'color', 'colorful', 'colour', 'combination', 'combine', 'combo', 'come', 'comfiest', 'comfort', 'comfortable', 'comfortably', 'comfy', 'comically', 'comment', 'common', 'compaint', 'company', 'compare', 'comparison', 'compel', 'compete', 'complain', 'complaint', 'complement', 'complementary', 'complete', 'completely', 'complexion', 'compliment', 'compose', 'computer', 'con', 'conceal', 'concept', 'concern', 'concerned', 'concert', 'condition', 'cone', 'confident', 'conform', 'confortable', 'confuse', 'conscience', 'conscious', 'conservative', 'consider', 'considerable', 'considerably', 'consideration', 'consistent', 'consistently', 'constant', 'constrict', 'construct', 'construction', 'contact', 'contains', 'contemporary', 'contender', 'continue', 'continuous', 'continuously', 'contortionist', 'contrast', 'contri', 'contributer', 'control', 'convince', 'cool', 'cooler', 'coral', 'corner', 'correct', 'correctly', 'cost', 'costume', 'cotton', 'cottony', 'could', 'couldnt', 'couldve', 'count', 'countless', 'couple', 'coupon', 'course', 'cover', 'coverage', 'cowl', 'cozy', 'craft', 'craftsmanship', 'crave', 'crazy', 'cream', 'creamy', 'crease', 'create', 'creates', 'creation', 'creative', 'creepy', 'crisp', 'criticism', 'cro', 'crochet', 'crooked', 'crop', 'cropped', 'cross', 'crotch', 'cry', 'cuff', 'culotte', 'cup', 'cupcake', 'curatorial', 'curious', 'curl', 'current', 'currently', 'curtain', 'curve', 'curvey', 'curvier', 'curvy', 'cushy', 'customer', 'cut', 'cute', 'cuter', 'cutest', 'cutesy', 'cutout', 'cycle', 'daily', 'dainty', 'damage', 'damp', 'dance', 'dapper', 'dark', 'darker', 'darkler', 'darn', 'dart', 'date', 'daughte', 'daughter', 'day', 'daydream', 'daytime', 'ddd', 'ddresses', 'deal', 'dealt', 'dear', 'death', 'debate', 'deceive', 'decent', 'decide', 'decision', 'decorative', 'deep', 'deeper', 'deeply', 'defect', 'defectiv', 'defective', 'define', 'definetly', 'definite', 'definitely', 'definition', 'degree', 'deletta', 'delicate', 'delicateness', 'delight', 'deliver', 'demure', 'denim', 'denimy', 'dense', 'deodorant', 'department', 'depend', 'depict', 'depictes', 'depiction', 'derby', 'derriere', 'describe', 'described', 'description', 'design', 'designer', 'desire', 'despite', 'destroys', 'detail', 'detailed', 'deter', 'detergent', 'detract', 'develop', 'developed', 'diameter', 'diamond', 'didnt', 'die', 'diet', 'difference', 'different', 'differently', 'differs', 'difficu', 'difficult', 'digress', 'dime', 'dimension', 'dimensional', 'din', 'dind', 'dinner', 'dip', 'dipped', 'direct', 'direction', 'directly', 'dirty', 'disagree', 'disappear', 'disappoint', 'disappointed', 'disappointingly', 'disappointment', 'disappoitned', 'disaster', 'disastrously', 'discard', 'discount', 'discover', 'discrepancy', 'disgust', 'disintegrate', 'display', 'displayed', 'disppaointed', 'distinct', 'distinctive', 'distorts', 'distract', 'distress', 'distressed', 'ditto', 'dive', 'diver', 'division', 'dolan', 'doldrums', 'doll', 'dolman', 'dont', 'door', 'dot', 'double', 'doubt', 'down', 'downside', 'downward', 'drab', 'drag', 'dragonfly', 'dramatic', 'drape', 'draped', 'drapery', 'drapey', 'drawn', 'drawstring', 'dreadful', 'dream', 'dress', 'dressier', 'dressy', 'drew', 'drive', 'drool', 'droop', 'droopy', 'drop', 'drown', 'dry', 'dryable', 'dryer', 'due', 'dull', 'dumpy', 'durable', 'duster', 'dusty', 'dye', 'eagerly', 'earlier', 'early', 'earth', 'ease', 'easel', 'easily', 'east', 'easter', 'easy', 'eat', 'ecept', 'edge', 'edginess', 'edgy', 'edition', 'effect', 'effortless', 'ehm', 'either', 'ejans', 'elaborate', 'elastic', 'elasticized', 'elbow', 'elegance', 'elegant', 'elegantly', 'element', 'eleven', 'elongate', 'else', 'elsewhere', 'email', 'emb', 'embarrass', 'embellishment', 'emboss', 'embroider', 'embroidery', 'emerald', 'empire', 'employee', 'end', 'endo', 'endow', 'enhance', 'enhances', 'enjoy', 'enormous', 'enough', 'ensemble', 'enter', 'entire', 'entirely', 'envelope', 'environment', 'envision', 'eptite', 'equal', 'equally', 'equates', 'eregular', 'errand', 'error', 'especially', 'esque', 'essentially', 'estimation', 'etc', 'ethereal', 'eva', 'evanthe', 'eve', 'even', 'evening', 'evenly', 'event', 'eventually', 'ever', 'everthing', 'every', 'everyday', 'everyone', 'everythiing', 'everything', 'everywhere', 'exacerbate', 'exact', 'exactly', 'exaggerate', 'exaggerated', 'examination', 'exceed', 'excellent', 'except', 'exception', 'excess', 'exchange', 'excite', 'exclamation', 'exclusive', 'exclusively', 'excuse', 'execute', 'execution', 'exist', 'existent', 'expand', 'expect', 'expectation', 'expens', 'expensive', 'experience', 'expose', 'exposure', 'exquisite', 'extend', 'exterior', 'extra', 'extraordinary', 'extremely', 'eye', 'eyelet', 'fab', 'fabric', 'fabulous', 'face', 'facebook', 'fact', 'factor', 'fade', 'fail', 'fails', 'fair', 'faire', 'fairisle', 'fairly', 'fake', 'fall', 'family', 'fan', 'fancier', 'fancy', 'fantasia', 'fantastic', 'far', 'farm', 'fashion', 'fashionable', 'fasten', 'fastener', 'faster', 'fat', 'fate', 'father', 'fault', 'faux', 'favor', 'favorable', 'favorite', 'fear', 'feature', 'february', 'fee', 'feedback', 'feel', 'fell', 'felt', 'female', 'feminie', 'feminine', 'femininity', 'fence', 'festive', 'fiance', 'fiber', 'fifth', 'figure', 'fill', 'filmy', 'finally', 'find', 'fine', 'finger', 'fingernail', 'finish', 'firm', 'first', 'firstly', 'fit', 'fitter', 'fitting', 'five', 'fix', 'flair', 'flamenco', 'flamingo', 'flannel', 'flap', 'flare', 'flash', 'flat', 'flatering', 'flatter', 'flatters', 'flattrering', 'flaw', 'fleece', 'flex', 'flexibility', 'flimsy', 'flip', 'flirtiness', 'flirty', 'floaty', 'floor', 'flop', 'floppy', 'floral', 'florals', 'florascura', 'floreat', 'florida', 'flouncier', 'flouncy', 'flour', 'flow', 'flower', 'flowiness', 'flowy', 'flutter', 'fold', 'foldover', 'follow', 'fool', 'foot', 'football', 'forehead', 'forest', 'forever', 'forgive', 'forgotten', 'form', 'formal', 'forth', 'fortunately', 'forward', 'found', 'four', 'fox', 'fragile', 'frame', 'france', 'franco', 'frankly', 'fray', 'frayed', 'free', 'french', 'fresh', 'friend', 'friendly', 'frigid', 'frock', 'front', 'frontal', 'frumpy', 'frustrate', 'full', 'fulle', 'fuller', 'fullness', 'fully', 'fun', 'functional', 'functionality', 'funky', 'funnel', 'funny', 'fur', 'furry', 'fuss', 'future', 'fuzz', 'fuzzy', 'fyi', 'gal', 'game', 'gap', 'gape', 'gaped', 'gappy', 'garish', 'garment', 'garner', 'gather', 'gathering', 'gauze', 'gauzy', 'gear', 'gegt', 'general', 'generally', 'generous', 'gentle', 'get', 'getaway', 'ghillie', 'giant', 'giantess', 'gift', 'ginormous', 'girl', 'girlie', 'girly', 'give', 'glad', 'glamorous', 'glass', 'glitter', 'glittery', 'globally', 'glorify', 'glove', 'glue', 'god', 'goddess', 'gold', 'goldenrod', 'gonna', 'good', 'goodhyouman', 'goodness', 'goodwill', 'gorgeous', 'gotten', 'grab', 'grabbed', 'graceful', 'grade', 'grandma', 'grant', 'graphic', 'gray', 'grease', 'great', 'green', 'greenish', 'grey', 'groom', 'grossly', 'ground', 'group', 'grow', 'grrrrrrrrrrr', 'grunge', 'grungy', 'guaranteed', 'guess', 'gun', 'gut', 'haha', 'hair', 'half', 'halfway', 'halloween', 'halter', 'hand', 'handful', 'handle', 'handwoven', 'hang', 'hanger', 'happen', 'happens', 'happier', 'happy', 'hard', 'hardware', 'hat', 'hate', 'hawaii', 'head', 'heart', 'heather', 'heavier', 'heavily', 'heavy', 'heck', 'heed', 'heeks', 'heel', 'hefty', 'hei', 'height', 'held', 'help', 'helpful', 'hem', 'hemline', 'hesitant', 'hesitate', 'hesitation', 'hidden', 'hide', 'hideous', 'hiding', 'high', 'highlight', 'highly', 'hike', 'hinder', 'hint', 'hip', 'hit', 'hmm', 'hoffman', 'hoist', 'hokey', 'hold', 'hole', 'holiday', 'holly', 'home', 'honest', 'honestly', 'honeymoon', 'hoodie', 'hook', 'hop', 'hope', 'hopefully', 'horizontal', 'horrendous', 'horrible', 'horribly', 'horrify', 'horse', 'horseshoe', 'hot', 'hotel', 'hottest', 'hour', 'hourglass', 'house', 'houston', 'however', 'hsould', 'hte', 'htey', 'hubby', 'hug', 'huge', 'humid', 'humongous', 'hung', 'hungry', 'hunt', 'husband', 'hyphen', 'hypo', 'iand', 'ice', 'idea', 'ideal', 'iliked', 'ill', 'image', 'imagine', 'immediate', 'immediately', 'immodest', 'imo', 'impact', 'impeccable', 'imperfection', 'important', 'importantly', 'impossible', 'impractical', 'impressed', 'impression', 'improve', 'inaccurate', 'inappropriate', 'inch', 'include', 'inconsistent', 'increase', 'incredible', 'incredibly', 'indecent', 'indeed', 'indicates', 'indication', 'indifference', 'indigo', 'inexpensive', 'informal', 'information', 'initially', 'inner', 'inseam', 'inside', 'inspection', 'inspire', 'install', 'instantly', 'instead', 'instruction', 'instuctions', 'insulate', 'insult', 'intarsia', 'intend', 'intent', 'intention', 'intentionally', 'interest', 'interested', 'intimate', 'intrepid', 'intricate', 'invent', 'inward', 'iris', 'iron', 'ish', 'isnt', 'issue', 'italy', 'itch', 'itchiness', 'itchy', 'item', 'iteration', 'ive', 'ivory', 'jack', 'jacket', 'jackpot', 'jagged', 'jammie', 'janes', 'jean', 'jersey', 'jessa', 'jetson', 'jewelry', 'jewlery', 'jkeep', 'jlo', 'job', 'jogger', 'jogging', 'jsut', 'judge', 'judging', 'judy', 'juice', 'july', 'jumped', 'jumper', 'jumpsuit', 'jungle', 'justice', 'justify', 'jut', 'kangaroo', 'keep', 'keeper', 'kelly', 'kept', 'key', 'keyhole', 'kid', 'kill', 'killer', 'kimono', 'kind', 'kinda', 'kiss', 'kit', 'knee', 'knew', 'knit', 'knot', 'know', 'kudos', 'label', 'lace', 'lack', 'lacy', 'lady', 'laid', 'land', 'landscape', 'lanky', 'lapel', 'large', 'largely', 'larkspur', 'lars', 'last', 'lastly', 'late', 'lately', 'later', 'laugh', 'launder', 'laundering', 'launders', 'laundry', 'lavender', 'lay', 'layer', 'layerng', 'layout', 'lazy', 'lbd', 'ldecide', 'leaf', 'lean', 'leaner', 'learn', 'least', 'leather', 'leave', 'lecture', 'left', 'leg', 'legging', 'lend', 'length', 'lengthwise', 'lengthy', 'less', 'lesson', 'let', 'level', 'levi', 'libs', 'lie', 'life', 'lifeless', 'lift', 'light', 'lighter', 'lightly', 'lightweight', 'lik', 'like', 'likely', 'limit', 'limited', 'limitless', 'line', 'linebacker', 'lineman', 'linen', 'liner', 'list', 'listen', 'listing', 'literally', 'littl', 'little', 'live', 'local', 'location', 'lol', 'long', 'longer', 'look', 'loom', 'loooooong', 'loop', 'loose', 'loosely', 'loosen', 'loosens', 'looser', 'los', 'lose', 'loser', 'loss', 'lot', 'lounge', 'loungewear', 'love', 'lovely', 'low', 'luck', 'lucked', 'luckily', 'lucky', 'lump', 'luv', 'luxurious', 'lyocell', 'machine', 'maeve', 'mail', 'main', 'mainly', 'maintain', 'major', 'majority', 'make', 'makeup', 'malfun', 'mammoth', 'man', 'manage', 'manner', 'manufacturer', 'many', 'mara', 'mark', 'marked', 'marled', 'mary', 'mask', 'massive', 'massively', 'match', 'material', 'maternity', 'maternityish', 'mathced', 'matronly', 'matter', 'matvehd', 'mauve', 'max', 'maxi', 'may', 'maybe', 'maza', 'mcguire', 'mean', 'meant', 'measure', 'measurement', 'med', 'meda', 'mediocre', 'medium', 'mediumd', 'meet', 'meeting', 'meh', 'meleri', 'mellow', 'melon', 'men', 'mention', 'merely', 'mermaid', 'mesh', 'mess', 'messy', 'met', 'metal', 'metallic', 'mexico', 'michelin', 'mid', 'middl', 'middle', 'midi', 'midriff', 'midsection', 'midst', 'midway', 'midweight', 'might', 'mild', 'mildish', 'million', 'mind', 'mine', 'mini', 'minimal', 'minimally', 'minor', 'mint', 'mintue', 'minus', 'minute', 'miraculously', 'mirror', 'misalign', 'mislabeled', 'mislead', 'mismatch', 'misplace', 'misrepresent', 'miss', 'misshapen', 'mistake', 'mistaken', 'mix', 'mixed', 'mockneck', 'mod', 'model', 'modern', 'modest', 'modesty', 'modify', 'mom', 'moment', 'money', 'monitor', 'monstrosity', 'month', 'monthly', 'moo', 'mood', 'mor', 'morning', 'moss', 'mostly', 'mother', 'motif', 'move', 'moveable', 'movement', 'movie', 'msallet', 'much', 'mud', 'muffin', 'multiple', 'muscle', 'musclea', 'muscular', 'must', 'mustard', 'mute', 'muumuu', 'nail', 'name', 'narrow', 'narrower', 'natural', 'naturally', 'nauseate', 'nautical', 'navel', 'navy', 'near', 'nearly', 'neat', 'neatly', 'necessary', 'neck', 'necklace', 'neckli', 'neckline', 'necklne', 'need', 'needle', 'negative', 'neither', 'ness', 'net', 'neutral', 'never', 'new', 'next', 'nice', 'nicely', 'nicer', 'night', 'nightgown', 'nightmare', 'nightshirt', 'nightwear', 'nine', 'nip', 'nipple', 'nit', 'nod', 'non', 'none', 'nonetheless', 'nope', 'normal', 'normally', 'north', 'notch', 'note', 'nothing', 'notice', 'noticeable', 'nottom', 'nowhere', 'nude', 'number', 'numerically', 'numerous', 'nursing', 'nyc', 'nylon', 'oatmeal', 'objection', 'obscure', 'obsess', 'obvious', 'obviously', 'occasion', 'occurrence', 'odd', 'oddly', 'odor', 'offensive', 'offer', 'offering', 'office', 'offset', 'often', 'oil', 'okay', 'oklahoma', 'old', 'olive', 'one', 'online', 'onto', 'oop', 'oops', 'opaque', 'open', 'opening', 'opinion', 'opportunity', 'oppose', 'opposite', 'opt', 'option', 'orange', 'order', 'ordere', 'ordinary', 'ore', 'original', 'originally', 'others', 'otherwise', 'othewise', 'out', 'outcome', 'outdoors', 'outer', 'outfit', 'outrageous', 'outsi', 'outside', 'outstanding', 'outwards', 'overall', 'overlay', 'overlook', 'overly', 'overpower', 'overprice', 'oversized', 'overtaken', 'overweight', 'overwhelm', 'overwhelmed', 'owend', 'own', 'owner', 'pack', 'package', 'packaging', 'pad', 'padderns', 'page', 'paid', 'paige', 'paint', 'pair', 'pajama', 'pale', 'palette', 'pan', 'panel', 'pant', 'paper', 'par', 'paris', 'parka', 'part', 'partake', 'particular', 'particularly', 'partum', 'partway', 'party', 'pas', 'pass', 'past', 'pasty', 'patch', 'patten', 'patter', 'pattern', 'pay', 'peach', 'peachy', 'peacock', 'pear', 'pearl', 'peasant', 'peek', 'peekaboo', 'peeve', 'peittes', 'pencil', 'people', 'peple', 'peplum', 'per', 'percent', 'percentage', 'perception', 'perfect', 'perfection', 'perfectly', 'perfume', 'perhaps', 'periwinkle', 'person', 'personal', 'personally', 'perspective', 'peter', 'petite', 'petticoat', 'phot', 'photo', 'photograph', 'photographer', 'photoshoot', 'physique', 'pic', 'pick', 'picked', 'picky', 'picture', 'piece', 'pilcr', 'pilcro', 'pile', 'pill', 'pilled', 'pilling', 'pillowcase', 'pillowy', 'pin', 'pinch', 'pink', 'pinstripe', 'pinstriped', 'pip', 'pit', 'place', 'placement', 'placket', 'plaform', 'plaid', 'plain', 'plan', 'planning', 'plastic', 'plasticy', 'play', 'player', 'pleasant', 'pleasantly', 'please', 'pleased', 'pleat', 'plenty', 'plum', 'plunge', 'plus', 'plush', 'pocket', 'point', 'pointy', 'poke', 'polished', 'polite', 'polka', 'poly', 'polyester', 'polyestery', 'poncho', 'ponder', 'ponte', 'pooch', 'poofed', 'poofy', 'pool', 'poolside', 'poor', 'poorly', 'pop', 'poplin', 'popped', 'poppy', 'popular', 'portion', 'portrayed', 'portrays', 'pose', 'position', 'positive', 'posse', 'possibility', 'possible', 'possibly', 'post', 'postpartum', 'potato', 'potential', 'potentially', 'pouch', 'pouf', 'pound', 'pour', 'powdery', 'practical', 'practically', 'practice', 'prairie', 'pre', 'prefectly', 'prefer', 'preference', 'pregnancy', 'pregnant', 'prepared', 'preppy', 'press', 'prettier', 'prettiness', 'pretty', 'prevent', 'previous', 'price', 'pricepoint', 'pricey', 'princess', 'print', 'prior', 'priority', 'pro', 'prob', 'probably', 'problem', 'process', 'proclaim', 'product', 'professional', 'profile', 'project', 'promise', 'promising', 'promptly', 'prone', 'pronounce', 'proof', 'proofed', 'properly', 'proportion', 'proportional', 'proportionally', 'protect', 'protrude', 'provide', 'prudish', 'public', 'pucker', 'puff', 'puffier', 'puffy', 'pull', 'puller', 'pullover', 'punk', 'puppy', 'purchase', 'purchaser', 'purchasing', 'pure', 'purple', 'purportedly', 'push', 'put', 'puttin', 'quad', 'quadricep', 'quality', 'qualm', 'question', 'quickly', 'quilt', 'quirkiness', 'quirky', 'quite', 'quot', 'racerback', 'rack', 'rag', 'rail', 'raise', 'ran', 'random', 'range', 'ranna', 'rare', 'rarity', 'rather', 'rave', 'raw', 'rayon', 'rceived', 'rea', 'reach', 'read', 'reading', 'readjustment', 'ready', 'real', 'reality', 'realize', 'reallly', 'really', 'rear', 'reason', 'reasonably', 'recei', 'receive', 'recent', 'recently', 'recieved', 'recognize', 'recommend', 'record', 'recreate', 'recurrent', 'red', 'redeem', 'redo', 'reduce', 'reese', 'ref', 'reference', 'refine', 'refresh', 'refuse', 'regain', 'regard', 'register', 'regret', 'regrettably', 'regular', 'rehearsal', 'reinforce', 'reinforces', 'reject', 'relatively', 'relaxed', 'relazation', 'relegate', 'reliable', 'reliant', 'relieve', 'reluctantly', 'remain', 'remedied', 'remember', 'remind', 'reminder', 'reminds', 'reminiscent', 'remotely', 'removable', 'remove', 'renaissance', 'reorder', 'repackaged', 'repair', 'repeat', 'replace', 'replacement', 'report', 'represent', 'require', 'resembles', 'reservation', 'resewn', 'resist', 'rest', 'restock', 'restretched', 'restroom', 'result', 'retailer', 'retro', 'return', 'returnable', 'reveal', 'reveiws', 'reversible', 'review', 'reviewer', 'rib', 'ribbed', 'ribbon', 'rich', 'richly', 'rid', 'ride', 'ridiculous', 'ridiculously', 'right', 'rigid', 'rinse', 'rip', 'ripped', 'ripple', 'rise', 'risk', 'risky', 'robe', 'rock', 'rocker', 'rode', 'roll', 'romantic', 'romper', 'room', 'roomier', 'roomy', 'rooster', 'rough', 'round', 'rub', 'ruching', 'ruffle', 'rug', 'ruin', 'rumple', 'run', 'runner', 'runway', 'rust', 'sack', 'sacklike', 'sacrifice', 'sad', 'saddle', 'sadie', 'sadly', 'safe', 'sag', 'sake', 'sale', 'salesperson', 'salmon', 'salon', 'sample', 'sand', 'sandal', 'sans', 'sash', 'sassy', 'sat', 'satin', 'saturday', 'save', 'savvy', 'saw', 'say', 'scallop', 'scar', 'scarf', 'school', 'scoop', 'scrap', 'scratch', 'scratchiness', 'scratchy', 'scream', 'scroll', 'scuba', 'seam', 'seamstress', 'search', 'season', 'seasonal', 'seat', 'seater', 'second', 'secondly', 'section', 'secure', 'seductive', 'see', 'seem', 'sel', 'seldom', 'selection', 'self', 'sell', 'semi', 'send', 'sense', 'sensitive', 'sent', 'separate', 'september', 'sequin', 'serious', 'seriously', 'service', 'set', 'several', 'sevigny', 'sew', 'sewer', 'sewn', 'sexiness', 'sexy', 'shabby', 'shade', 'shadow', 'shaggy', 'shake', 'shame', 'shape', 'shapeless', 'sharkbite', 'sharp', 'shear', 'sheath', 'shed', 'sheen', 'sheer', 'sheerness', 'sheet', 'shelf', 'shell', 'shift', 'shimmer', 'shimmery', 'shin', 'shiny', 'ship', 'shipping', 'shirt', 'shirter', 'shock', 'shoe', 'shook', 'shop', 'shopper', 'shopping', 'short', 'shortcoming', 'shorten', 'shorter', 'shortest', 'shortness', 'shorty', 'shot', 'shoulder', 'show', 'shower', 'showy', 'shrank', 'shrink', 'shrinkage', 'shrug', 'shrunk', 'shrunken', 'shut', 'shy', 'sice', 'side', 'sienna', 'sight', 'significant', 'significantly', 'silhouette', 'silk', 'silky', 'silly', 'silver', 'similar', 'simple', 'simpleness', 'simply', 'since', 'sincerely', 'single', 'sipper', 'sister', 'sit', 'site', 'sits', 'situation', 'six', 'size', 'skater', 'skeptical', 'skewed', 'skim', 'skin', 'skinny', 'skir', 'skirt', 'skort', 'sky', 'slack', 'slate', 'sleek', 'sleep', 'sleepwear', 'sleeve', 'sleeved', 'sleeveless', 'slender', 'slide', 'slight', 'slightest', 'slightly', 'slim', 'slimmer', 'slimming', 'slinky', 'slip', 'slit', 'slither', 'sloppy', 'slouch', 'slouchy', 'slub', 'slvs', 'sma', 'smae', 'smaill', 'small', 'smart', 'smear', 'smell', 'smile', 'smitten', 'smock', 'smoky', 'smooth', 'smoothly', 'snag', 'snake', 'snakeskin', 'snap', 'snatch', 'snowy', 'snug', 'snuggle', 'snugly', 'social', 'sock', 'sofa', 'soft', 'soften', 'softer', 'softest', 'softly', 'softness', 'solid', 'solve', 'solves', 'someday', 'somehow', 'someone', 'something', 'sometimes', 'somewhat', 'somewhere', 'son', 'soo', 'soon', 'sooner', 'sooo', 'soooo', 'sooooo', 'soooooo', 'sophisticated', 'sorbet', 'sorely', 'sorry', 'sort', 'soul', 'source', 'southeast', 'space', 'spain', 'span', 'spandex', 'spare', 'sparkle', 'sparkly', 'speak', 'special', 'specific', 'speckle', 'spectacular', 'spend', 'spending', 'spent', 'spill', 'splash', 'split', 'splotchy', 'splurge', 'spoiler', 'sport', 'sporty', 'spot', 'spring', 'springy', 'square', 'squat', 'squeeze', 'squirm', 'squish', 'staff', 'stage', 'stain', 'stalk', 'stamp', 'stand', 'standard', 'standout', 'standup', 'staple', 'star', 'starch', 'start', 'starter', 'state', 'statement', 'static', 'stats', 'stature', 'stay', 'stayed', 'steal', 'steam', 'steel', 'steep', 'step', 'stet', 'stevie', 'stick', 'sticky', 'stiff', 'stiffer', 'stiffness', 'still', 'stilt', 'stitch', 'stitchwork', 'stock', 'stomach', 'stone', 'stood', 'stop', 'store', 'straight', 'straighter', 'strand', 'strange', 'strangely', 'stranger', 'strap', 'strapless', 'streak', 'street', 'stress', 'stretch', 'stretchy', 'stright', 'strike', 'string', 'strip', 'stripe', 'strong', 'struck', 'structure', 'struggle', 'stuck', 'stuff', 'stumble', 'stun', 'sturdy', 'style', 'stylish', 'stylist', 'stylize', 'subpar', 'substantial', 'subtle', 'successfully', 'suck', 'sudden', 'suede', 'suffer', 'suggest', 'suggestion', 'sui', 'suit', 'suitable', 'summary', 'summer', 'summery', 'sumo', 'sun', 'sunday', 'sundry', 'sunlight', 'super', 'superior', 'supervisor', 'support', 'supportive', 'suppose', 'sure', 'surface', 'surpass', 'surprise', 'surprised', 'surprisingly', 'surrender', 'suspect', 'suuuuper', 'swallow', 'swap', 'sweat', 'sweater', 'sweatshirt', 'sweep', 'sweet', 'swept', 'swim', 'swimsuit', 'swing', 'swingy', 'swtr', 'symbol', 'symmetrical', 'synthetic', 'tab', 'tablecloth', 'tacked', 'tacky', 'tad', 'tag', 'tail', 'tailor', 'take', 'talk', 'tall', 'taller', 'tallest', 'tan', 'tank', 'taper', 'tassel', 'taste', 'tat', 'taupe', 'teach', 'teacher', 'teal', 'tear', 'technically', 'tee', 'teh', 'tell', 'temp', 'tempt', 'ten', 'tencel', 'tend', 'tendency', 'tends', 'tennys', 'tent', 'term', 'terre', 'terrible', 'terribly', 'test', 'tex', 'texas', 'text', 'texture', 'textured', 'thank', 'thanks', 'thats', 'theme', 'ther', 'therefore', 'thermal', 'thick', 'thicker', 'thickness', 'thigh', 'thin', 'thing', 'think', 'thinner', 'thinnest', 'third', 'thirty', 'tho', 'though', 'thought', 'thread', 'three', 'threw', 'thrill', 'throughout', 'throw', 'thru', 'thus', 'ticket', 'tie', 'tiered', 'tigh', 'tight', 'tighten', 'tighter', 'tightly', 'tightness', 'tights', 'till', 'time', 'timeless', 'tinselly', 'tiny', 'tire', 'title', 'today', 'toget', 'together', 'told', 'tolerance', 'tomato', 'tomorrow', 'ton', 'tone', 'tooks', 'top', 'tore', 'torn', 'torso', 'toss', 'total', 'totally', 'touch', 'tough', 'towards', 'town', 'toy', 'tracy', 'traditional', 'trans', 'transformer', 'transition', 'transitional', 'transparency', 'transparent', 'trashy', 'travel', 'trend', 'trendy', 'trhrow', 'triangle', 'trick', 'trickier', 'tricky', 'trim', 'trip', 'trouble', 'trouser', 'true', 'truly', 'trumped', 'trunk', 'trust', 'try', 'tshirt', 'tube', 'tuck', 'tucked', 'tucker', 'tug', 'tulip', 'tulle', 'tum', 'tumble', 'tummy', 'tunic', 'turn', 'turquoise', 'turtleneck', 'tweak', 'twice', 'twig', 'twill', 'twin', 'twist', 'two', 'type', 'typical', 'typically', 'ugliest', 'ugly', 'uhm', 'ultimately', 'ultra', 'ummmmm', 'umph', 'unbearably', 'unbuttoned', 'unclear', 'uncomfortable', 'uncomfortably', 'undecided', 'underarm', 'underarms', 'undergarment', 'undergrad', 'underneath', 'underrated', 'underside', 'underslip', 'understand', 'understated', 'understatement', 'understood', 'underwear', 'underwire', 'undo', 'undone', 'undoubtedly', 'uneven', 'unevenly', 'unexpected', 'unfinished', 'unfitted', 'unflattering', 'unfold', 'unforgiving', 'unfortunate', 'unfortunately', 'unfrotuantely', 'unhappy', 'uniform', 'unify', 'unimpressive', 'unique', 'uniquely', 'unkept', 'unless', 'unlike', 'unlined', 'unnatural', 'unnoticable', 'unpleasant', 'unravel', 'unsee', 'unsnapped', 'unstructured', 'unsure', 'unusual', 'unusually', 'unwashed', 'unwearable', 'unworn', 'unzipped', 'update', 'upholstery', 'upon', 'upper', 'ups', 'upset', 'upside', 'usa', 'use', 'usual', 'usually', 'vacation', 'vain', 'valentine', 'value', 'vanity', 'variety', 'various', 'vast', 'velvet', 'velvety', 'venice', 'ver', 'verdict', 'verge', 'versatile', 'versatility', 'version', 'vertical', 'vest', 'vibe', 'vibrant', 'victorian', 'view', 'vintage', 'vinyasa', 'virtually', 'viscose', 'visible', 'visual', 'vivid', 'voila', 'volume', 'waaaaaay', 'waaaaay', 'waas', 'waiat', 'waist', 'waistband', 'waisted', 'waistline', 'wait', 'walk', 'want', 'wardrobe', 'warm', 'warmer', 'warmth', 'warn', 'warp', 'wary', 'wasa', 'wash', 'washable', 'waste', 'watch', 'water', 'waterproof', 'waver', 'wavy', 'way', 'weak', 'wear', 'wearable', 'wearer', 'weather', 'weave', 'web', 'webbed', 'website', 'wedding', 'wedge', 'week', 'weekend', 'weigh', 'weighs', 'weight', 'weighty', 'weird', 'weirdly', 'well', 'western', 'wet', 'whacky', 'whatever', 'whatsoever', 'whe', 'whereas', 'whether', 'whilst', 'whim', 'whimsical', 'whit', 'white', 'whole', 'whose', 'whte', 'wide', 'widen', 'widens', 'wider', 'width', 'wiggle', 'wil', 'win', 'wind', 'wine', 'wing', 'winner', 'winter', 'wise', 'wish', 'wishful', 'within', 'without', 'withstand', 'wks', 'wld', 'woke', 'woman', 'wonder', 'wonderful', 'wonderfully', 'wonky', 'wont', 'wool', 'woolite', 'wooly', 'wor', 'word', 'wore', 'work', 'workmanship', 'workplace', 'world', 'worn', 'worried', 'worry', 'worth', 'worthy', 'wou', 'would', 'woven', 'wow', 'wrap', 'wrestler', 'wrestling', 'wrikling', 'wrinkle', 'wrinkly', 'wrist', 'write', 'wrong', 'wth', 'wwear', 'xsmall', 'xsp', 'xspetite', 'xxl', 'xxsp', 'xxxl', 'yarn', 'yay', 'yeah', 'year', 'yell', 'yellow', 'yep', 'yes', 'yesterday', 'yet', 'yikes', 'yoga', 'young', 'youthful', 'zag', 'zero', 'zig', 'zip', 'zipped', 'zipper', 'zoom']
csr_mat.toarray().shape
(1258, 3003)
X_train , X_test , y_train , y_test = train_test_split(csr_mat.toarray() , reviews['Target'] ,test_size=0.2,random_state=10,stratify=reviews['Target'])
X_train.shape
(1006, 3003)
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
clf = MultinomialNB()
clf.fit(X_train,y_train)
MultinomialNB()
y_pred = clf.predict(X_test)
confusion_matrix(y_test,y_pred)
array([[ 32, 66],
[ 2, 152]], dtype=int64)
sns.heatmap(confusion_matrix(y_test,y_pred),annot=True)
<AxesSubplot:>
clf.score(X_train,y_train)
0.8389662027833003
Accuracy of training set.
clf.score(X_test,y_test)
0.7301587301587301
Accuracy of testing set.
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train , y_train)
LogisticRegression()
y_pred = clf.predict(X_test)
sns.heatmap(confusion_matrix(y_test,y_pred),annot=True)
<AxesSubplot:>
clf.score(X_train,y_train)
0.9363817097415507
Accuracy of training set.
clf.score(X_test,y_test)
0.8531746031746031
Accuracy of testing set.
is a novel explanation technique that explains the prediction of any classifier in an interpretable and faithful manner by learning an interpretable model locally around the prediction.
import lime
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(
training_data=np.array(X_train),
feature_names=tfidf.get_feature_names(),
class_names=[0, 1],
#mode='classification'
)
X_test[0]
array([0., 0., 0., ..., 0., 0., 0.])
clf.predict_proba(X_test)
array([[0.65615791, 0.34384209],
[0.32452824, 0.67547176],
[0.68559401, 0.31440599],
[0.434553 , 0.565447 ],
[0.76588693, 0.23411307],
[0.19400476, 0.80599524],
[0.5091309 , 0.4908691 ],
[0.48748099, 0.51251901],
[0.53428503, 0.46571497],
[0.34903387, 0.65096613],
[0.40684657, 0.59315343],
[0.50638579, 0.49361421],
[0.19939735, 0.80060265],
[0.16381572, 0.83618428],
[0.18972461, 0.81027539],
[0.524078 , 0.475922 ],
[0.07999835, 0.92000165],
[0.73190998, 0.26809002],
[0.7754521 , 0.2245479 ],
[0.60025212, 0.39974788],
[0.77658531, 0.22341469],
[0.22513345, 0.77486655],
[0.41693425, 0.58306575],
[0.22105719, 0.77894281],
[0.74196046, 0.25803954],
[0.66449486, 0.33550514],
[0.37813496, 0.62186504],
[0.50674027, 0.49325973],
[0.46534128, 0.53465872],
[0.17108843, 0.82891157],
[0.45773239, 0.54226761],
[0.3838299 , 0.6161701 ],
[0.73101019, 0.26898981],
[0.55552959, 0.44447041],
[0.22507024, 0.77492976],
[0.27757639, 0.72242361],
[0.19123891, 0.80876109],
[0.30742107, 0.69257893],
[0.51689956, 0.48310044],
[0.42805148, 0.57194852],
[0.2950952 , 0.7049048 ],
[0.34612205, 0.65387795],
[0.26033452, 0.73966548],
[0.26617644, 0.73382356],
[0.47089747, 0.52910253],
[0.44387787, 0.55612213],
[0.17816035, 0.82183965],
[0.51894639, 0.48105361],
[0.52250911, 0.47749089],
[0.24009385, 0.75990615],
[0.58283794, 0.41716206],
[0.16760362, 0.83239638],
[0.33408349, 0.66591651],
[0.31650287, 0.68349713],
[0.14739525, 0.85260475],
[0.54054345, 0.45945655],
[0.31334542, 0.68665458],
[0.21955791, 0.78044209],
[0.17300293, 0.82699707],
[0.21481268, 0.78518732],
[0.55768464, 0.44231536],
[0.35690622, 0.64309378],
[0.15929302, 0.84070698],
[0.13083233, 0.86916767],
[0.53780179, 0.46219821],
[0.11189336, 0.88810664],
[0.47287762, 0.52712238],
[0.59493923, 0.40506077],
[0.38954461, 0.61045539],
[0.16229341, 0.83770659],
[0.54440496, 0.45559504],
[0.13237354, 0.86762646],
[0.41172652, 0.58827348],
[0.58733516, 0.41266484],
[0.48977887, 0.51022113],
[0.14851141, 0.85148859],
[0.39415283, 0.60584717],
[0.11170398, 0.88829602],
[0.46814067, 0.53185933],
[0.24610188, 0.75389812],
[0.24158341, 0.75841659],
[0.21851232, 0.78148768],
[0.29503689, 0.70496311],
[0.72483538, 0.27516462],
[0.16481463, 0.83518537],
[0.58965323, 0.41034677],
[0.2402972 , 0.7597028 ],
[0.69284181, 0.30715819],
[0.19384593, 0.80615407],
[0.51020877, 0.48979123],
[0.39309787, 0.60690213],
[0.36846354, 0.63153646],
[0.24760855, 0.75239145],
[0.12500739, 0.87499261],
[0.22985003, 0.77014997],
[0.30570314, 0.69429686],
[0.63316702, 0.36683298],
[0.58481463, 0.41518537],
[0.43441793, 0.56558207],
[0.31756497, 0.68243503],
[0.42665885, 0.57334115],
[0.48538804, 0.51461196],
[0.52088974, 0.47911026],
[0.11821947, 0.88178053],
[0.48208288, 0.51791712],
[0.752249 , 0.247751 ],
[0.27475294, 0.72524706],
[0.52525044, 0.47474956],
[0.60637423, 0.39362577],
[0.37589181, 0.62410819],
[0.13162797, 0.86837203],
[0.09980781, 0.90019219],
[0.2965506 , 0.7034494 ],
[0.70669619, 0.29330381],
[0.56805063, 0.43194937],
[0.44508376, 0.55491624],
[0.09605335, 0.90394665],
[0.23534385, 0.76465615],
[0.37133501, 0.62866499],
[0.74376124, 0.25623876],
[0.34085272, 0.65914728],
[0.24685219, 0.75314781],
[0.54098515, 0.45901485],
[0.77613028, 0.22386972],
[0.23881697, 0.76118303],
[0.48709517, 0.51290483],
[0.27054966, 0.72945034],
[0.19268588, 0.80731412],
[0.43814213, 0.56185787],
[0.53654558, 0.46345442],
[0.24897833, 0.75102167],
[0.26886223, 0.73113777],
[0.23842527, 0.76157473],
[0.57892987, 0.42107013],
[0.11164888, 0.88835112],
[0.1767729 , 0.8232271 ],
[0.30713636, 0.69286364],
[0.18424658, 0.81575342],
[0.29846533, 0.70153467],
[0.39305665, 0.60694335],
[0.16075625, 0.83924375],
[0.24599316, 0.75400684],
[0.48754697, 0.51245303],
[0.3234569 , 0.6765431 ],
[0.41810146, 0.58189854],
[0.60893782, 0.39106218],
[0.27298042, 0.72701958],
[0.17758912, 0.82241088],
[0.24985734, 0.75014266],
[0.50245022, 0.49754978],
[0.5315177 , 0.4684823 ],
[0.3841542 , 0.6158458 ],
[0.32436407, 0.67563593],
[0.74216008, 0.25783992],
[0.36543617, 0.63456383],
[0.5876 , 0.4124 ],
[0.40092439, 0.59907561],
[0.36900194, 0.63099806],
[0.31643151, 0.68356849],
[0.56833595, 0.43166405],
[0.63759888, 0.36240112],
[0.6398871 , 0.3601129 ],
[0.09853028, 0.90146972],
[0.17595538, 0.82404462],
[0.21668153, 0.78331847],
[0.15509925, 0.84490075],
[0.44544654, 0.55455346],
[0.24603645, 0.75396355],
[0.23667551, 0.76332449],
[0.13808422, 0.86191578],
[0.24287116, 0.75712884],
[0.46604283, 0.53395717],
[0.2965022 , 0.7034978 ],
[0.775715 , 0.224285 ],
[0.78773105, 0.21226895],
[0.72590591, 0.27409409],
[0.19134566, 0.80865434],
[0.16757159, 0.83242841],
[0.09439242, 0.90560758],
[0.30495635, 0.69504365],
[0.15384837, 0.84615163],
[0.25254938, 0.74745062],
[0.50714714, 0.49285286],
[0.21691129, 0.78308871],
[0.10060472, 0.89939528],
[0.69887132, 0.30112868],
[0.56359632, 0.43640368],
[0.2260143 , 0.7739857 ],
[0.51465673, 0.48534327],
[0.50796234, 0.49203766],
[0.60181432, 0.39818568],
[0.59771101, 0.40228899],
[0.45802026, 0.54197974],
[0.14321001, 0.85678999],
[0.08693376, 0.91306624],
[0.36423031, 0.63576969],
[0.56836755, 0.43163245],
[0.10457189, 0.89542811],
[0.17869365, 0.82130635],
[0.21157134, 0.78842866],
[0.57585518, 0.42414482],
[0.15172122, 0.84827878],
[0.24651024, 0.75348976],
[0.42590072, 0.57409928],
[0.65498229, 0.34501771],
[0.40495087, 0.59504913],
[0.20557604, 0.79442396],
[0.40879487, 0.59120513],
[0.37773025, 0.62226975],
[0.4050312 , 0.5949688 ],
[0.41595486, 0.58404514],
[0.53682182, 0.46317818],
[0.38855576, 0.61144424],
[0.05049182, 0.94950818],
[0.23656598, 0.76343402],
[0.23709339, 0.76290661],
[0.57963312, 0.42036688],
[0.39945008, 0.60054992],
[0.27435674, 0.72564326],
[0.6346527 , 0.3653473 ],
[0.22269262, 0.77730738],
[0.11985603, 0.88014397],
[0.4283686 , 0.5716314 ],
[0.45001149, 0.54998851],
[0.60809132, 0.39190868],
[0.26133517, 0.73866483],
[0.78526104, 0.21473896],
[0.38178418, 0.61821582],
[0.47643064, 0.52356936],
[0.62910182, 0.37089818],
[0.67098748, 0.32901252],
[0.26199582, 0.73800418],
[0.18612468, 0.81387532],
[0.22298299, 0.77701701],
[0.39005566, 0.60994434],
[0.57177733, 0.42822267],
[0.24422943, 0.75577057],
[0.25874461, 0.74125539],
[0.6796355 , 0.3203645 ],
[0.69610444, 0.30389556],
[0.37868406, 0.62131594],
[0.32224457, 0.67775543],
[0.1140753 , 0.8859247 ],
[0.18855308, 0.81144692],
[0.50240472, 0.49759528],
[0.62274441, 0.37725559],
[0.3973454 , 0.6026546 ],
[0.47530461, 0.52469539],
[0.59126401, 0.40873599],
[0.36295942, 0.63704058],
[0.32953028, 0.67046972],
[0.45983118, 0.54016882]])
exp = explainer.explain_instance(
data_row=X_test[0],
predict_fn=clf.predict_proba,
num_features=6
)
exp.show_in_notebook(show_table=True)
exp = explainer.explain_instance(
data_row=X_test[1],
predict_fn=clf.predict_proba
)
exp.show_in_notebook(show_table=True)
exp = explainer.explain_instance(
data_row=X_test[5],
predict_fn=clf.predict_proba
)
exp.show_in_notebook(show_table=True)
It is a python package that is used to inspect ML classifiers and give a explanation to their predictions. It is popularly used to debug algorithms such as sklearn regressors and classifiers, XGBoost, CatBoost, Keras, etc.
import eli5 as eli
eli.explain_weights(clf)
y=1 top features
| Weight? | Feature |
|---|---|
| +2.508 | x1154 |
| +2.292 | x1475 |
| +1.975 | x512 |
| +1.786 | x235 |
| +1.701 | x1839 |
| +1.605 | x1498 |
| +1.525 | x1353 |
| +1.382 | x1681 |
| +1.291 | x1012 |
| +1.290 | x514 |
| +1.259 | x947 |
| +1.246 | x998 |
| … 1436 more positive … | |
| … 1245 more negative … | |
| -1.212 | x168 |
| -1.444 | x163 |
| -1.490 | x2637 |
| -1.590 | x2781 |
| -1.717 | x2785 |
| -1.822 | x425 |
| -2.377 | x739 |
| -2.971 | x2125 |
eli.explain_prediction(clf , X_test[1])
y=1 (probability 0.675, score 0.733) top features
| Contribution? | Feature |
|---|---|
| +0.189 | x1154 |
| +0.153 | <BIAS> |
| +0.137 | x947 |
| +0.112 | x1525 |
| +0.111 | x2930 |
| +0.109 | x2791 |
| +0.101 | x2836 |
| +0.099 | x2352 |
| +0.074 | x1863 |
| +0.065 | x24 |
| +0.047 | x1750 |
| +0.036 | x1480 |
| +0.034 | x2862 |
| +0.025 | x2883 |
| +0.025 | x2141 |
| +0.021 | x1545 |
| +0.020 | x2604 |
| +0.019 | x653 |
| +0.016 | x2682 |
| +0.015 | x1705 |
| +0.013 | x1205 |
| +0.012 | x2346 |
| +0.005 | x1143 |
| +0.001 | x1094 |
| -0.002 | x374 |
| -0.003 | x824 |
| -0.006 | x481 |
| -0.010 | x812 |
| -0.021 | x1884 |
| -0.021 | x2224 |
| -0.026 | x1065 |
| -0.035 | x2284 |
| -0.037 | x1744 |
| -0.040 | x2017 |
| -0.050 | x2345 |
| -0.060 | x1482 |
| -0.066 | x351 |
| -0.067 | x2013 |
| -0.068 | x2819 |
| -0.192 | x882 |
eli.show_prediction(clf, X_test[1],
feature_names=tfidf.get_feature_names(),
show_feature_values=True)
y=1 (probability 0.675, score 0.733) top features
| Contribution? | Feature | Value |
|---|---|---|
| +0.189 | great | 0.075 |
| +0.153 | <BIAS> | 1.000 |
| +0.137 | fall | 0.109 |
| +0.112 | many | 0.125 |
| +0.111 | winter | 0.135 |
| +0.109 | unique | 0.122 |
| +0.101 | versatile | 0.146 |
| +0.099 | slightly | 0.126 |
| +0.074 | piece | 0.231 |
| +0.065 | add | 0.131 |
| +0.047 | option | 0.161 |
| +0.036 | long | 0.098 |
| +0.034 | wait | 0.135 |
| +0.025 | wear | 0.067 |
| +0.025 | right | 0.102 |
| +0.021 | may | 0.127 |
| +0.020 | tee | 0.135 |
| +0.019 | day | 0.118 |
| +0.016 | top | 0.065 |
| +0.015 | noticeable | 0.192 |
| +0.013 | help | 0.149 |
| +0.012 | sleeved | 0.200 |
| +0.005 | gorgeous | 0.120 |
| +0.001 | fyi | 0.192 |
| -0.002 | cannot | 0.156 |
| -0.003 | effect | 0.186 |
| -0.006 | close | 0.148 |
| -0.010 | ease | 0.192 |
| -0.021 | plain | 0.176 |
| -0.021 | see | 0.096 |
| -0.026 | frayed | 0.186 |
| -0.035 | short | 0.093 |
| -0.037 | opening | 0.172 |
| -0.040 | quality | 0.093 |
| -0.050 | sleeve | 0.101 |
| -0.060 | look | 0.056 |
| -0.066 | button | 0.127 |
| -0.067 | put | 0.124 |
| -0.068 | use | 0.131 |
| -0.192 | even | 0.182 |
import gensim
from gensim import corpora
# libraries for visualization
import pyLDAvis
import pyLDAvis.gensim_models as gensimvis
tokenized_reviews = [nltk.word_tokenize(sent) for sent in reviews['Clean Text']]
len(tokenized_reviews)
1258
dictionary = corpora.Dictionary(tokenized_reviews)
doc_term_matrix = [dictionary.doc2bow(rev) for rev in tokenized_reviews]
# Creating the object for LDA model using gensim library
LDA = gensim.models.ldamodel.LdaModel
# Build LDA model
lda_model = LDA(corpus=doc_term_matrix, id2word=dictionary, num_topics=10, random_state=100,
chunksize=1000, passes=50,iterations=100)
lda_model.print_topics()
[(0, '0.034*"dress" + 0.032*"look" + 0.020*"like" + 0.019*"fit" + 0.018*"top" + 0.017*"fabric" + 0.015*"try" + 0.013*"make" + 0.013*"love" + 0.012*"picture"'), (1, '0.021*"dress" + 0.019*"size" + 0.016*"wear" + 0.016*"fit" + 0.015*"sweater" + 0.015*"small" + 0.015*"like" + 0.014*"look" + 0.011*"love" + 0.011*"would"'), (2, '0.025*"look" + 0.020*"top" + 0.020*"color" + 0.017*"love" + 0.017*"shirt" + 0.016*"fit" + 0.016*"size" + 0.015*"great" + 0.014*"wear" + 0.012*"small"'), (3, '0.037*"pant" + 0.030*"dress" + 0.027*"blouse" + 0.026*"fit" + 0.025*"wash" + 0.020*"love" + 0.018*"leg" + 0.017*"great" + 0.015*"absolutely" + 0.015*"dry"'), (4, '0.024*"one" + 0.021*"great" + 0.015*"right" + 0.013*"really" + 0.012*"pretty" + 0.011*"get" + 0.011*"neck" + 0.011*"kind" + 0.011*"jacket" + 0.010*"work"'), (5, '0.027*"size" + 0.027*"top" + 0.025*"fit" + 0.017*"look" + 0.016*"like" + 0.016*"fabric" + 0.015*"love" + 0.013*"would" + 0.013*"little" + 0.012*"small"'), (6, '0.021*"jacket" + 0.018*"order" + 0.017*"well" + 0.015*"fit" + 0.014*"long" + 0.014*"one" + 0.014*"look" + 0.014*"size" + 0.014*"suit" + 0.011*"big"'), (7, '0.043*"love" + 0.026*"great" + 0.023*"color" + 0.021*"fit" + 0.015*"jacket" + 0.015*"wear" + 0.014*"denim" + 0.013*"size" + 0.012*"little" + 0.011*"piece"'), (8, '0.035*"dress" + 0.016*"color" + 0.013*"size" + 0.012*"nice" + 0.011*"order" + 0.010*"pant" + 0.009*"wear" + 0.009*"like" + 0.008*"make" + 0.008*"feel"'), (9, '0.029*"button" + 0.026*"many" + 0.026*"compliment" + 0.025*"top" + 0.025*"receive" + 0.020*"time" + 0.018*"wore" + 0.017*"love" + 0.014*"design" + 0.013*"first"')]
# Visualize the topics
pyLDAvis.enable_notebook()
vis = gensimvis.prepare(lda_model, doc_term_matrix, dictionary)
vis
bow=dictionary.doc2bow(tokenized_reviews[0])
lda_model.get_document_topics(bow)
[(0, 0.22575486), (1, 0.35032144), (9, 0.40641472)]
#selecting the first topic, which is 0
topic=0
topic_1_reviews={}
for i in range(len(doc_term_matrix)):
topics=lda_model[doc_term_matrix[i]]
for element in topics:
#first topic
if element[0]==topic:
topic_1_reviews[i] = element[1]
len(topic_1_reviews)
514
from heapq import nlargest
# N largest values in dictionary
# Using nlargest
res = nlargest(3, topic_1_reviews, key = topic_1_reviews.get)
# printing result
print("The top reviews for first topic are " + str(res))
The top reviews for first topic are [322, 1093, 58]
reviews.loc[res]
| Review Text | Rating | Target | Clean Text | |
|---|---|---|---|---|
| 322 | I wanted to love this sweatshirt, but alas, it... | 2 | 0 | want love sweatshirt ala back upon opening unf... |
| 1093 | I like this sweater so much i just bought it i... | 5 | 1 | like sweater much bought second color pleat ma... |
| 58 | Ordered a l and xl, the sleeves were so tight ... | 1 | 0 | order sleeve tight size rest shirt wide loose ... |